library(tidyverse)

#***************#
#   Load Data   #
#***************#

# Getting the data from Harvard dataverse
# https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/H6RZA7

#Set Working Directory

path <- "U:/Replication data/dataverse_files"

setwd(path)

re_data <- read.csv2("re_data.csv", sep = ",")
fe_data <- read.csv2("fe_data.csv", sep = ",")

re_data[,6:16] <- sapply(sapply(re_data[,6:16],as.character), as.numeric)
fe_data[,6:16] <- sapply(sapply(fe_data[,6:16],as.character), as.numeric)

sorted_dataframe <- arrange(fe_data, account_id)

Sys.setlocale("LC_TIME", "C") #changes the date format from Danish to English
options(scipen=999)

##################
#### Figure 2 ####
##################

###Total followers###

timeplot_temporary <- data.frame(time = (c(1:127)),
                                 followers = double(length(1:127)),
                                 date = seq(as.Date("2016-03-16", format = "%Y-%m-%d"), as.Date("2016-07-20", format = "%Y-%m-%d"), by = 1),
                                 stringsAsFactors = FALSE)

follow_count_df <- sorted_dataframe %>%
  group_by(time) %>%
  summarise(fol_users = sum(followers_count, na.rm = TRUE))

timeplot <- left_join(timeplot_temporary, follow_count_df, by = c("time" = "time"))


# Total followers plot
(timeplot_fol_users <- ggplot(timeplot, aes(timeplot$date, timeplot$fol_users/10000, na.rm=TRUE)) +
  geom_line() +
  #how it looks:
  theme_bw() +
  ggtitle("Total IS Twitter Follower Count over Time") +
  xlab("Date") +
  ylab("Number of IS Twitter followers (10,000)") +
  geom_vline(xintercept = as.numeric(as.Date("2016-03-22")), size=2,linetype=1, colour="grey") +
  geom_vline(xintercept = as.numeric(as.Date("2016-07-14")), size=2, linetype=1, colour="grey"))


###Mean users (total divided by unique users)###
twoway_table <- as.data.frame(table(sorted_dataframe$account_id, sorted_dataframe$time))

colnames(twoway_table) <- c("user_ID", "day", "frequency")

twoway_table$day <- as.character(twoway_table$day) %>%
  as.numeric()

# Binding missing days to data frame
add_missing_days <- tribble(~user_ID, ~day, ~frequency,
                            NA, 72, NA, 
                            NA, 73, NA, 
                            NA, 74, NA, 
                            NA, 75, NA,
                            NA, 76, NA, 
                            NA, 77, NA, 
                            NA, 78, NA,
                            NA, 79, NA, 
                            NA, 80, NA, 
                            NA, 81, NA, 
                            NA, 82, NA, 
                            NA, 83, NA,
                            NA, 84, NA,
                            NA, 85, NA,
                            NA, 86, NA, 
                            NA, 87, NA,
                            NA, 89, NA, 
                            NA, 90, NA, 
                            NA, 91, NA, 
                            NA, 92, NA)

twoway_table <- rbind(twoway_table, add_missing_days)

twoway_table <- twoway_table %>%
  group_by(day) %>%
  summarise(unique_users = sum(frequency, na.rm = TRUE))

twoway_table <- left_join(twoway_table, timeplot, by = c("day" = "time"))

twoway_table$mean_users <- twoway_table$fol_users/twoway_table$unique_users

# Mean followers plot

(timeplot_mean_users <- ggplot(twoway_table, aes(twoway_table$date, twoway_table$mean_users, na.rm=TRUE)) +
  geom_line() +
  #how it looks:
  theme_bw() +
  ggtitle("Mean IS Twitter Follower Count over Time") +
  xlab("Date") +
  ylab("Mean number of IS Twitter followers") +
  geom_vline(xintercept = as.numeric(as.Date("2016-03-22")), size=2,linetype=1, colour="grey") +
  geom_vline(xintercept = as.numeric(as.Date("2016-07-14")), size=2, linetype=1, colour="grey") +
  ylim(0,450)) 

###########################
###Recalculated p-values###
###########################

# Z-scores for Brussels attack - one-sided tests
z_1 <- -22.8 / 14.3
z_2 <- -23.2 / 14.3
z_3 <- -18.4 / 10.9
z_4 <- -14.3 / 9.8

pnorm(c(z_1, z_2, z_3, z_4))

# Z scores for Brussels attack - two-sided tests
#p-values, model 1
m1_one_sided <- pnorm(z_1)
(m1_two_sided <- m1_one_sided*2)

#p-values, model 2
m2_one_sided <- pnorm(z_2)
(m2_two_sided <- m2_one_sided*2)

#p-values, model 3
m3_one_sided <- pnorm(z_3)
(m3_two_sided <- m3_one_sided*2)

#p-values, model 1
m4_one_sided <- pnorm(z_4)
(m4_two_sided <- m4_one_sided*2)

# Z-scores for Nice attack - one-sided tests
z_1N <- -52.5 / 15.2
z_2N <- -115.5 / 24.0
z_3N <- -30.3 / 15.5
z_4N <- -15.2 / 11.3

pnorm(c(z_1N, z_2N, z_3N, z_4N))

# Z scores for Nice attack - two-sided tests
#p-values, model 1
m1_one_sidedN <- pnorm(z_1N)
(m1_two_sidedN <- m1_one_sidedN*2)

#p-values, model 2
m2_one_sidedN <- pnorm(z_2N)
(m2_two_sidedN <- m2_one_sidedN*2)

#p-values, model 3
m3_one_sidedN <- pnorm(z_3N)
(m3_two_sidedN <- m3_one_sidedN*2)

#p-values, model 1
m4_one_sidedN <- pnorm(z_4N)
(m4_two_sidedN <- m4_one_sidedN*2)







